*make id by job panel for career analysis
local dir "YOUR PATH HERE"
use "`dir'\nlsy79_learning_panel.dta", clear
*by education
gen hs_less=educ>=1 & educ<=12
gen ba_deg=educ>=16 & educ<=20
*create measure of actual work experience
gen work_exp=wkexp_yr_total-wkexp_yr_youth
replace work_exp=39 if work_exp>39 & work_exp!=.
replace work_exp=round(work_exp)
forvalues y=19(2)59 {
	local a=`y'+1
	gen age_`y'to`a'=age>=`y' & age<=`a'
}
foreach var of varlist age_21to22- age_59to60 {
	gen ba_`var'=ba_deg*`var'
}
forvalues y=1(1)39 {
	gen workexp_`y'=work_exp==`y'
}
forvalues y=0(2)38 {
	local a=`y'+1
	gen workexp_`y'to`a'=work_exp>=`y' & work_exp<=`a'
}

foreach var of varlist workexp_0to1-workexp_38to39 {
	gen ba_`var'=ba_deg*`var'
}
order id year wage wage1 wage2 wage3 wage4 wage5 hrs1 hrs2 hrs3 hrs4 hrs5 occ occ1 occ2 occ3 occ4 occ5 occ1990dd occ1990dd1 occ1990dd2 occ1990dd3 occ1990dd4 occ1990dd5 tenure tenure1 tenure2 tenure3 tenure4 tenure5 cur_work1 cur_work2 cur_work3 cur_work4 cur_work5
tsset id year
sort id year
*measure increase in jobs from wave to wave
gen jobsnum_next=jobsnum-jobsnum[_n-1] if id==id[_n-1]
replace jobsnum_next=0 if jobsnum_next==-1

*Reorder job variables chronologically, within wave
foreach x in wage hrs tenure occ occ1990dd cur_work {
	forvalues y=1(1)5 {
		gen `x'_j`y'=.
	}
}
*Even up missings - rule is that the job only counts if wage and tenure are both nonmissing*
foreach x in hrs cur_work occ occ1990dd tenure {
	forvalues y=1(1)5 {
		replace `x'`y'=. if wage`y'==.
	}
}
foreach x in hrs cur_work occ occ1990dd wage {
	forvalues y=1(1)5 {
		replace `x'`y'=. if tenure`y'==.
	}
}
*Now count number of jobs in each wave that satisfy criteria
egen job_count=rownonmiss(wage1 wage2 wage3 wage4 wage5)
*loop over jobs to reorder
foreach x in hrs cur_work occ occ1990dd tenure wage {
	forvalues y=5(-1)1 {
		replace `x'_j1=`x'`y' if job_count==`y'
	}
}
foreach x in hrs cur_work occ occ1990dd tenure wage {
	forvalues y=5(-1)2 {
		local d=`y'-1
		replace `x'_j2=`x'`d' if job_count==`y'
	}
}
foreach x in hrs cur_work occ occ1990dd tenure wage {
	forvalues y=5(-1)3 {
		local c=`y'-2
		replace `x'_j3=`x'`c' if job_count==`y'
	}
}
foreach x in hrs cur_work occ occ1990dd tenure wage {
	forvalues y=5(-1)4 {
		local b=`y'-3
		replace `x'_j4=`x'`b' if job_count==`y'
	}
}
foreach x in hrs cur_work occ occ1990dd tenure wage {
	replace `x'_j5=`x'1 if job_count==5
}
*subtract current job count from measured year-to-year increase in jobs
*values greater than zero mean that the respondent kept the same job across waves
*zero or less means a switch 
gen new_test=job_count-jobsnum_next

*count number of jobs per year
egen jobs_yr=rownonmiss( wage_j1- wage_j5)
*count jobs over first X years of post-schooling career
sort id year
gen career=0 if year==schl_max_full
forvalues y=-2(1)30 {
	replace career=`y' if year==schl_max_full+`y'
}
gen career_neg2=career==-2
gen career_neg1=career==-1
gen career_1=career==1
gen career_2=career==2
forvalues y=3(2)29 {
	local a=`y'+1
	gen career_`y'to`a'=career>=`y' & career<=`a'
}
foreach var of varlist career_neg2-career_29to30 {
    gen ba_`var'=ba_deg*`var'
}
*estimate, for each career year, the probabilities of having 1) same employer as last wave; 2) new employer; 3) no employer
gen unemp=emp==0
gen same_job=1 if new_job==0
replace same_job=0 if new_job==1

gen tenure_yr=tenure/52
gen wkexp_wk=wkexp_yr*40
replace wkexp_wk=. if wkexp_wk==0
*set attrition to missing
foreach x in unemp {
	replace `x'=. if emp==.
}

*Occupation categories
gen mgmt=occ1990dd>=4 & occ1990dd<=22
gen superv=1 if occ1990dd==243 | occ1990dd==303 | (occ1990dd>=413 & occ1990dd<=415) | occ1990dd==433 | occ1990dd==448 | occ1990dd==450 | occ1990dd==470 | occ1990dd==503 | occ1990dd==558 | occ1990dd==628 | occ1990dd==803 | occ1990dd==823
replace mgmt=1 if superv==1
gen prof=occ1990dd>=23 & occ1990dd<=235
gen white=occ1990dd>=243 & occ1990dd<=389 & superv!=1
gen blue=occ1990dd>=405 & occ1990dd<=889 & superv!=1
gen serv=occ1990dd>=405 & occ1990dd<=498 & superv!=1
gen blue_noserv=occ1990dd>=499 & occ1990dd<=889 & superv!=1
foreach x in mgmt prof white blue serv blue_noserv {
    replace `x'=. if occ1990dd==.
}
*Routineness
merge m:1 occ1990dd using "`dir'\routine_convert_2017.dta", keep(master match) nogen

*merge occupation by age wage projections
merge m:1 age first_occ using "`dir'\cps_occ_age_NLSYcohorts.dta", keep(master match) nogenerate
gen ln_wage=ln(wage)
gen ln_wage_firstocc=ln(wage_first_occ)

xtset id

*Table 1
foreach x in jobsnum same_job wkexp_wk wage {
	xtreg `x' career_neg2-career_29to30 if career!=. & hs_less==1 & age<=60, fe vce(cluster id)
	xtreg `x' career_neg2-career_29to30 if career!=. & ba_deg==1 & age<=60, fe vce(cluster id)
	
}

*Figure 5 and Table A4 - occupational sorting by career year
*Panel A
foreach x in mgmt prof white blue {
	xtreg `x' career_neg2-career_29to30 if career!=. & hs_less==1 & age>=18 & age<=60, fe vce(cluster id)
}
*Panel B
foreach x in mgmt prof white blue {
	xtreg `x' career_neg2-career_29to30 if career!=. & ba_deg==1 & age>=18 & age<=60, fe vce(cluster id)
}

*Figure 6
foreach x in routine_2017 {
    xtreg `x' career_neg2-career_29to30 if career!=. & hs_less==1 & age>=18 & age<=60, fe vce(cluster id)
	xtreg `x' career_neg2-career_29to30 if career!=. & ba_deg==1 & age>=18 & age<=60, fe vce(cluster id)
}

*Figure 8
*Estimate Mincerian return to education, actual vs. projected wages
foreach x in ln_wage ln_wage_firstocc {
    xtreg `x' ba_age_25to26-ba_age_59to60 age_25to26-age_59to60 if age>=23 & age<=60 & work_exp>=0 & work_exp<=39, fe vce(cluster id)
}

*Now create job panel

reshape long wage_j hrs_j occ_j occ1990dd_j tenure_j cur_work_j, i(id year) j(job)
replace job=. if wage_j==. | tenure_j==.
bysort id year: egen job_max=max(job)
egen job_yr=group(job year)
drop if wage_j==. | tenure_j==.

bysort id: egen jobs_all=total(job_count)

*starting with first year, assign job ID to jobs 1-5 in order, among those with nonmissing wage/hrs/tenure
*that works as an algorithm, except we have to keep the same job number across years when it's the same employer - also have to sum hrs and tenure, note any occupation switch

*get rid of blanks first
drop if wage_j==. | tenure_j==.
*count unique jobs
gen jobID=1
sort id year job
*unique job observation
replace jobID=jobID[_n-1]+1 if id==id[_n-1]
*account for same job, across survey waves
replace jobID=jobID[_n-1] if emp_tenure>emp_tenure[_n-1]

foreach x in tenure hrs {
	bysort id jobID: egen `x'_jobID=total(`x'_j)
}
*individual by employer FE and individual by employer by occ FE
gen ln_wage_j=ln(wage_j)
egen id_job=group(id jobID)
egen id_job_occ=group(id jobID occ1990dd)

*Figure 4

*Panel A and Table A2 - by career year
*Compare overall wage growth to within-job wage growth
*Between job = Overall - within-job

*HS diploma or less
xtset id
xtreg ln_wage_j career_neg2- career_29to30 if career!=. & hs_less==1 & age>=19 & age<=60 & work_exp>=0 & work_exp<=39 & id_job_occ!=., fe vce(cluster id)
xtset id_job_occ
xtreg ln_wage_j career_neg2- career_29to30 if career!=. & hs_less==1 & age>=19 & age<=60 & work_exp>=0 & work_exp<=39 & id_job_occ!=., fe vce(cluster id_job_occ)

*BA or more
xtset id
xtreg ln_wage_j career_neg2- career_29to30 if career!=. & ba_deg==1 & age>=19 & age<=60 & work_exp>=0 & work_exp<=39 & id_job_occ!=., fe vce(cluster id)
xtset id_job_occ
xtreg ln_wage_j career_neg2- career_29to30 if career!=. & ba_deg==1 & age>=19 & age<=60 & work_exp>=0 & work_exp<=39 & id_job_occ!=., fe vce(cluster id_job_occ)


*Panel B and Table A3 - repeat Panel A and Table A2, but for work experience rather than age

*HS diploma or less
xtset id
xtreg ln_wage_j workexp_2to3-workexp_38to39 if hs_less==1 & age>=19 & age<=60 & work_exp>=0 & work_exp<=39 & id_job_occ!=., fe vce(cluster id)
xtset id_job_occ
xtreg ln_wage_j workexp_2to3-workexp_38to39 if hs_less==1 & age>=19 & age<=60 & work_exp>=0 & work_exp<=39 & id_job_occ!=., fe vce(cluster id_job_occ)

*BA or more
xtset id
xtreg ln_wage_j workexp_2to3-workexp_38to39 if ba_deg==1 & age>=23 & age<=60 & work_exp>=0 & work_exp<=39 & id_job_occ!=., fe vce(cluster id)
xtset id_job_occ
xtreg ln_wage_j workexp_2to3-workexp_38to39 if ba_deg==1 & age>=23 & age<=60 & work_exp>=0 & work_exp<=39 & id_job_occ!=., fe vce(cluster id_job_occ)

*Figure A7 - just feed the results above into a figure and smooth the data with 5 year moving averages
